import json
import time
from pprint import pprint

import requests

from qichamao.proxy_helper import get_proxies


def get_page(page, proxies):
    """获取网页"""
    url = f'https://www.qichamao.com/cert-wall'
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
    }
    form_data = {"page": str(page), "pagesize": "9"}
    response = ''
    if proxies is None:
        response = requests.post(url, headers=headers, data=form_data)
    else:
        response = requests.post(url, headers=headers, data=form_data, proxies=proxies)
    if response.status_code == 200:
        # response.content返回的是字节流
        return response.content.decode('utf-8')
    return ''


def parse_page(html):
    json_data = json.loads(html)
    res = json_data['dataList']
    for item in res:
        pprint(item['CompanyName'])


def main():
    """主函数"""
    proxies = None
    for page in range(2, 300):
        print('page:', page)
        html = ''
        html = get_page(page, proxies)
        if '{"isSuccess":true' not in html:
            time.sleep(1)
            proxies = get_proxies()  # 使用代理ip
            print('使用代理ip：', proxies)
            page -= 1
            continue
        parse_page(html)


if __name__ == '__main__':
    main()
